/*******************************************************************************

This code file produces Figure A19, "Do Differences in Neighborhood Characteristics Explain Cost Differences?"

*******************************************************************************/

*** Manage settings

	run "$dir/code/modules/settings.do"
	
	* Set match variable
	local matchvars "lmedhhinc lmedgrossrent sh_poor sh_mtcoll sh_white"
	local n_boot "200"
		
	* Load Stata-TeX

	do "$code/modules/stata-tex.do"
	cd "$tables/estimates"
		
	* Save blank tempfile
	
	set obs 1
	
	gen matchvar = ""
	gen acost = .
	
	tempfile acost_tmp
	save `acost_tmp'
	
/*	
	
********************************************************************************
* Reweighting analysis
********************************************************************************

foreach matchvar in `matchvars' {
		
	*** Tenant-Based Section 8 

		import delimited "$data/raw/OtherHousingAssistance/Section8/hudPicture2015_360148.csv", encoding(ISO-8859-1) clear
		
		gen borough = substr(code,1,5)
		destring borough, replace force
		
		replace borough = 1 if borough == 36061
		replace borough = 2 if borough == 36005
		replace borough = 3 if borough == 36047
		replace borough = 4 if borough == 36081 
		replace borough = 5 if borough == 36085
		
		gen ct2010 = substr(code,6,6)
		destring ct2010, replace force
		
		drop if borough > 5
		
		rename reported assisted_unit_count
		rename averagehudexpenditurepermonth hudcost
		
		collapse (sum) assisted_unit_count (mean) hudcost, by(borough ct2010)
		
		merge 1:m borough ct2010 using "$data/clean/census_area_characteristics.dta", keep(2 3) nogen
		
		bys borough ct2010: egen sum_units = sum(ct_occ_renter_units*sh_poor)
		gen sh_units = (ct_occ_renter_units*sh_poor) / sum_units
		replace assisted_unit_count = assisted_unit_count * sh_units
		drop sum_units sh_units
		
		gen sh_mtcoll = sh_coll + sh_postgrad
		
		gcollapse (sum) assist (mean) `matchvar' hudcost, by(borough ct2010 cb2010)
		
		rename assisted_unit_count assisted_unit_count_tbsection8
		
		tempfile assisted_tbsection8
		save `assisted_tbsection8', replace

	*** LIHTC

		* Obtain distribution of desired match variable
		
		use "$data/clean/non_421a_units.dta", clear
		keep if bldg == 2
				
		merge m:1 borough ct2010 cb2010 using "$data/clean/census_area_characteristics.dta", keep(2 3) nogen

		gen sh_mtcoll = sh_coll + sh_postgrad
		
		gcollapse (sum) assisted_unit_count (mean) `matchvar', by(borough ct2010 cb2010)
		
		merge 1:1 borough ct2010 cb2010 using `assisted_tbsection8', nogen
		replace assisted_unit_count = assisted_unit_count + assisted_unit_count_tbsection8
		drop assisted_unit_count_tbsection8
		
		tempfile assisted_tbsection8_lihtc
		save `assisted_tbsection8_lihtc', replace

	*** 421-a units	

		use "$data/clean/cleaned_data.dta", clear
					
		tempfile bldg_weights
		save `bldg_weights'
		
		merge m:1 borough ct2010 cb2010 using "$data/clean/census_area_characteristics.dta", keep(2 3) nogen
		
		gen sh_mtcoll = sh_coll + sh_postgrad
		
		gcollapse (sum) unitsres (mean) `matchvar' if inclusionary_onsite == 1, by(borough ct2010 cb2010)
		
		tempfile units_421a
		save `units_421a', replace
		
	*** Construct rescaling factor

		* Merge together
		merge 1:1 borough ct2010 cb2010 using `assisted_tbsection8_lihtc', nogen
		
		kdens `matchvar' [aw=unitsres], gen(wt_421a) at(`matchvar') adaptive nograph
		kdens `matchvar' [aw=assisted_unit_count], gen(wt_assisted) at(`matchvar') adaptive nograph
		
		gen reweight = wt_assisted / wt_421a 
				
		keep borough ct2010 cb2010 reweight
		
	*** Apply rescaling factor to building weights

		merge 1:m borough ct2010 cb2010 using `bldg_weights', nogen keep(2 3)
			
	*** Estimate average cost

		do "$code/modules/bootstrap_programs.do"
	
		xtset, clear
		bootstrap tmp=r(tmp), reps(`n_boot') cluster(nta) idcluster(nta_boot): boot_acost_rw

		matrix mat_acost = r(table)
		local acost_estimate = mat_acost[1,1]
		local acost_stderr = mat_acost[2,1]
		
		* Save for table
		quietly cd "$tables/estimates"
		insert_into_file using analysis_t6.csv, key(acost_`matchvar'_est) value(`acost_estimate') format(%12.0fc)
		insert_into_file using analysis_t6.csv, key(acost_`matchvar'_stderr) value(`acost_stderr') format(%12.0fc)
		di "`matchvar': `acost_estimate' (`acost_stderr')"
		
		* Save for data
			
		gen matchvar = "`matchvar'"
		gen acost_estimate = `acost_estimate'
		gen acost_stderr = `acost_stderr'
		
		keep matchvar acost_estimate acost_stderr
			
		append using `acost_tmp'
		quietly save `acost_tmp', replace
			
}

*/

********************************************************************************
* Dinardo-Fortin-Lemieux reweighting
********************************************************************************

	*** Tenant-Based Section 8 

		import delimited "$data/raw/OtherHousingAssistance/Section8/hudPicture2015_360148.csv", encoding(ISO-8859-1) clear
		
		gen borough = substr(code,1,5)
		destring borough, replace force
		
		replace borough = 1 if borough == 36061
		replace borough = 2 if borough == 36005
		replace borough = 3 if borough == 36047
		replace borough = 4 if borough == 36081 
		replace borough = 5 if borough == 36085
		
		gen ct2010 = substr(code,6,6)
		destring ct2010, replace force
		
		drop if borough > 5
		
		rename reported assisted_unit_count
		rename averagehudexpenditurepermonth hudcost
		
		collapse (sum) assisted_unit_count (mean) hudcost, by(borough ct2010)
		
		merge 1:m borough ct2010 using "$data/clean/census_area_characteristics.dta", keep(2 3) nogen
		
		bys borough ct2010: egen sum_units = sum(ct_occ_renter_units*sh_poor)
		gen sh_units = (ct_occ_renter_units*sh_poor) / sum_units
		replace assisted_unit_count = assisted_unit_count * sh_units
		drop sum_units sh_units
		
		gen sh_mtcoll = sh_coll + sh_postgrad
		
		gcollapse (sum) assist (mean) $controls_block hudcost, by(borough ct2010 cb2010)
		
		rename assisted_unit_count assisted_unit_count_tbsection8
		
		tempfile assisted_tbsection8
		save `assisted_tbsection8', replace

	*** LIHTC

		* Obtain distribution of desired match variable
		
		use "$data/clean/non_421a_units.dta", clear
		keep if bldg == 2
				
		merge m:1 borough ct2010 cb2010 using "$data/clean/census_area_characteristics.dta", keep(2 3) nogen

		gen sh_mtcoll = sh_coll + sh_postgrad
		
		gcollapse (sum) assisted_unit_count (mean) $controls_block, by(borough ct2010 cb2010)
		
		merge 1:1 borough ct2010 cb2010 using `assisted_tbsection8', nogen
		replace assisted_unit_count = assisted_unit_count + assisted_unit_count_tbsection8
		drop assisted_unit_count_tbsection8
		
		tempfile assisted_tbsection8_lihtc
		save `assisted_tbsection8_lihtc', replace

	*** 421-a units	

		use "$data/clean/cleaned_data.dta", clear
					
		tempfile bldg_weights
		save `bldg_weights'
		
		merge m:1 borough ct2010 cb2010 using "$data/clean/census_area_characteristics.dta", keep(2 3) nogen
		
		gen sh_mtcoll = sh_coll + sh_postgrad
		
		gcollapse (sum) unitsres (mean) $controls_block if inclusionary_onsite == 1, by(borough ct2010 cb2010)
		
		tempfile units_421a
		save `units_421a', replace
		
	*** Construct rescaling factor

		* Merge together
		merge 1:1 borough ct2010 cb2010 using `assisted_tbsection8_lihtc', nogen
		
		
		replace unitsres = 0 if missing(unitsres)
		replace assisted = 0 if missing(assisted)
		gen sh_421a = unitsres/(unitsres+assisted_unit_count)
		gen n_units = unitsres+assisted_unit_count
		fracreg logit sh_421a $controls_block [pw=n_units]
				
		predict sh_421a_pred, cm
		
		gen reweight = (1-sh_421a_pred)/sh_421a_pred
		
		keep borough ct2010 cb2010 reweight
		
		merge 1:m borough ct2010 cb2010 using `bldg_weights', nogen keep(2 3)
			
	*** Estimate average cost

		do "$code/modules/bootstrap_programs.do"
			
		xtset, clear
		bootstrap tmp=r(tmp), reps(`n_boot') seed(1234) cluster(nta) idcluster(nta_boot): boot_acost_rw
		
		matrix mat_acost = r(table)
		local acost_estimate = mat_acost[1,1]
		local acost_stderr = mat_acost[2,1]
	
		* Save for data
			
		gen matchvar = "DFL"
		gen acost_estimate = `acost_estimate'
		gen acost_stderr = `acost_stderr'
		
		keep matchvar acost_estimate acost_stderr
			
		append using `acost_tmp'
		quietly save `acost_tmp', replace

********************************************************************************
* Add other benchmarks
********************************************************************************

*** Unadjusted average cost of 421a units

	use "$data/clean/cleaned_data.dta", clear
	
	do "$code/modules/bootstrap_programs.do"
	
	xtset, clear
	bootstrap tmp=r(tmp), reps(`n_boot') seed(1234) cluster(nta) idcluster(nta_boot): boot_acost

	matrix mat_acost = r(table)
	local acost_estimate = mat_acost[1,1]
	local acost_stderr = mat_acost[2,1]
	
	* Save for data
	
	gen matchvar = "421-a"
	gen acost_estimate = `acost_estimate'
	gen acost_stderr = `acost_stderr'
	keep matchvar acost_estimate acost_stderr
	
	append using `acost_tmp'
	quietly save `acost_tmp', replace
	
*** Tenant-Based Section 8 cost

	import delimited "$data/raw/OtherHousingAssistance/Section8/hudPicture2015_360148.csv", encoding(ISO-8859-1) clear
	
	gen borough = substr(code,1,5)
	destring borough, replace force
	
	replace borough = 1 if borough == 36061
	replace borough = 2 if borough == 36005
	replace borough = 3 if borough == 36047
	replace borough = 4 if borough == 36081 
	replace borough = 5 if borough == 36085
	
	drop if borough > 5
	
	rename reported occ_units
	rename averagehudexpenditurepermonth hud_exp_monthly

	keep hud_exp_monthly occ_units
	replace hud_exp_monthly = . if hud_exp_monthly < 0
	gen hud_exp = 12*hud_exp_monthly / 0.05
	
	summ hud_exp [aw=occ_units]
	local acost = r(mean)
	
	gen matchvar = "Section 8"
	gen acost_estimate = `acost'
	
	keep matchvar acost_estimate
	keep if _n == 1
	
	append using `acost_tmp'
	quietly save `acost_tmp', replace
	
*** LIHTC
	
	* Note: 9870 = GAO @ 5% annualized rate for TDC, 70% fiscal cost
	* Note: 10234 = Abt @ 5% annualized rate for TDC, 70% fiscal cost, from reg model (mid atlantic, principal city, high construction wages)
	* Note: 11729 = from 2015 max TDC, averaging over bedrooms, for elevator buildings

	clear
	
	set obs 1
	
	gen matchvar = "LIHTC"
	gen acost_estimate = 11000 / 0.05
	
	append using `acost_tmp'
	quietly save `acost_tmp', replace
	
********************************************************************************
* Make plots
********************************************************************************
	
	use `acost_tmp', clear	
	
	duplicates drop
	
	format %6.0fc acost_estimate 
	
	gen order = .
	replace order = 1 if matchvar == "LIHTC"
	replace order = 2 if matchvar == "Section 8"
	replace order = 3 if matchvar == "DFL"
	replace order = 4 if matchvar == "421-a" 

	replace matchvar = "Reweighted 421-a" if matchvar == "DFL"
	
	labmask order, values(matchvar)
	
	replace acost_estimate = acost_estimate / 1000000
	replace acost_stderr = acost_stderr / 1000000
	
	gen lo = acost_estimate - 1.96*acost_stderr
	gen hi = acost_estimate + 1.96*acost_stderr
	
	format acost_estimate %8.2f
		
	tw (scatter order acost_estimate, msize(large)) || (rcap hi lo order, horizontal lcolor(navy)), ///
		ylabel(1 2 3 4, valuelabel angle(0) labsize(large)) ytitle("") xtitle("Average Fiscal Cost Per Unit ($ Millions)", size(large)) ///
		graphregion(color(white)) legend(off) yscale(range(0.5 4.5)) ///
		xscale(range(0 1)) xlabel(0(0.25)1, labsize(large)) ysize(2.5)

	graph export "$figs/reweighting_result.pdf", replace
	graph export "$figs_overleaf/reweighting_result.pdf", replace
